# 需求：爬取糗事百科中糗图板块下所有的糗图
import requests
import re
import os

if __name__ == '__main__':
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
    }
    # 创建文件夹
    if not os.path.exists('./qiutuLibs'):
        os.mkdir('./qiutuLibs')
    # 设置一个通用的url模板
    url = 'https://www.qiushibaike.com/imgrank/page/{%d}/'
    pageNum = 2
    for pageNum in range(1, 4):
        # 对应页码的url
        new_url = format(url % pageNum)
        # 使用通用爬虫对url对应的一整张页面进行爬取
        page_text = reponse = requests.get(url=new_url, headers=headers).text

        # 使用聚焦爬虫将页面中所有的图片进行解析提取
        ex = '<div class="thumb">.*?<img src="(.*?)" alt.*?</div>'
        img_src_list = re.findall(ex, page_text, re.S)
        print('开始下载第{}页，此页共有{}张图片'.format(pageNum, len(img_src_list)))

        for src in img_src_list:
            src = 'https:' + src
            img_data = requests.get(url=src, headers=headers).content
            img_name = src.split('/')[-1]
            with open('./qiutuLibs/{}'.format(img_name), 'wb') as fp:
                fp.write(img_data)
                print(img_name, '下载成功！')
    print('over!')
